import re

# 1.打开文件，读取内容
f = open(r"D://D盘桌面/(完整版)高考英语3500词汇表中英文.txt", mode='r', encoding='utf-8')
senior = f.read()
f.close()
f = open(r"D://D盘桌面/2018中考英语必备1600个词汇.txt", mode='r', encoding='utf-8')
junior = f.read()
f.close()

# 2.解析数据，导出单词
obj1 = re.compile(r'''\n.*?\.(?P<word>.*?) \[''')
seniorLI = [i.group("word") for i in obj1.finditer(senior)]
print(seniorLI)
print('\n')
obj2 = re.compile(r'''\n(?P<word>.*?) ''')  # 字符串末尾有空格
juniorLI = [i.group("word") for i in obj2.finditer(junior)]
print(juniorLI)
print('\n')

# 3.删除重复单词，得出最终列表
def is_eng(strs):
    import string
    for i in strs:
        if i not in string.ascii_lowercase+string.ascii_uppercase:
            return False
    return True
for seniorWORD in seniorLI:
    if seniorWORD in juniorLI or not is_eng(seniorWORD):
        seniorLI.remove(seniorWORD)
print(seniorLI)

# 4.导出单词列表到文件
f = open("D://D盘桌面/高考词汇（除中考外）.txt", mode='w')
num = -1
for seniorWORD in seniorLI:
    f.write(seniorWORD)
    f.write('\n')
    num += 1
print("当前查找到单词： %s个" % num)
f.close()

# 5.结束进程
print('over')
